export PYTHONPATH=$YOUR_FLAGAI_HOME

PREPROCESS_DATA_TOOL=$PYTHONPATH/flagai/data/dataset/indexed_dataset/preprocess_data_args.py
TOKENIZER_DIR=$YOUR_TOKENIZER_DIR
TOKENIZER_NAME=$YOUR_TOKENIZER_NAME

INPUT_FILE=$YOUR_INPUT_FILE
FULL_OUTPUT_PREFIX=$YOUR_OUTPUT_PREFIX
echo $TOKENIZER_NAME
python $PREPROCESS_DATA_TOOL --input $INPUT_FILE --output-prefix $FULL_OUTPUT_PREFIX \
    --workers 4 --chunk-size 256 \
    --model-name $TOKENIZER_NAME --model-dir $TOKENIZER_DIR
